rstan_options(auto_write=TRUE)
options(mc.cores=3)

# interaction model with candidate random effects
model.interact <- "
data{
int N;
int K;

int C;
int<lower=1, upper=C> Cand[N];

int P;
int<lower=1, upper=P> Pty[N];

vector<lower=0, upper=100>[N] y;

matrix[N,K] X;

int M;
vector[M] logku_ncand;
}

parameters{
real alpha;
vector[K] beta;
vector[C] nu_c;
vector[P] nu_p;

real<lower=0, upper=100> sigma_y;
real<lower=0, upper=100> sigma_c;
real<lower=0, upper=100> sigma_p;
}

model{
sigma_y ~ uniform(0, 100);
sigma_c ~ uniform(0, 100);
sigma_p ~ uniform(0, 100);

y ~ normal(alpha + X*beta + nu_c[Cand] + nu_p[Pty], sigma_y);

nu_c ~ normal(0, sigma_c);
nu_p ~ normal(0, sigma_p);
}

generated quantities{
vector[M] dydx;
dydx = beta[1] + beta[2]*logku_ncand;
}
"

stanModel.interact <- stan_model(model_code=model.interact)




load("JHRED2014_New.RData")




###
### sntv vote share interaction (models 1 and 2 in Table H.1)
###

# election-year matrix
year_list <- sort(unique(sntv$year))
year.mat.sntv <- matrix(0, nrow=nrow(sntv), ncol=length(year_list)-1)

for(i in 2:length(year_list)){
  year.mat.sntv[sntv$year==year_list[i],i-1] <- 1
}

# explanatory variable matrix
# kakusu = name complexity (# of strokes)
x.mat.sntv.int1 <- cbind(sntv$kakusu, sntv$kakusu*log(sntv$ku_ncand), sntv$length,
                         log(sntv$ku_ncand), sntv$ku_m, year.mat.sntv)

sntv.dat.int1 <- list(N=nrow(sntv),
                      K=ncol(x.mat.sntv.int1),
                      y=sntv$voteshare,
                      X=x.mat.sntv.int1,
                      C=length(unique(sntv$candID)),
                      Cand=sntv$candID,
                      P=length(unique(sntv$ptyID)),
                      Pty=sntv$ptyID,
                      M=13,
                      logku_ncand=log(2:14))

sntv.all.intr1 <- sampling(stanModel.interact, sntv.dat.int1, iter=3000, warmup=1000, 
                           thin=1, chains=3, seed=1234)

						   
# kakusu_sd = standard deviation of name complexity in the district
x.mat.sntv.int2 <- cbind(sntv$kakusu, sntv$kakusu*sntv$kakusu_sd, sntv$length, 
                         log(sntv$ku_ncand), sntv$ku_m, sntv$kakusu_sd, year.mat.sntv)

sntv.dat.int2 <- list(N=nrow(sntv),
                      K=ncol(x.mat.sntv.int2),
                      y=sntv$voteshare,
                      X=x.mat.sntv.int2,
                      C=length(unique(sntv$candID)),
                      Cand=sntv$candID,
                      P=length(unique(sntv$ptyID)),
                      Pty=sntv$ptyID,
                      M=length(0:15),
                      logku_ncand=0:15)

sntv.all.intr2 <- sampling(stanModel.interact, sntv.dat.int2, iter=3000, warmup=1000, 
                           thin=1, chains=3, seed=1234)




###
### smdp vote share interaction (models 3 and 4 in Table H.1)
###

# election-year matrix
year_list <- sort(unique(smdp$year))
year.mat.smdp <- matrix(0, nrow=nrow(smdp), ncol=length(year_list)-1)

for(i in 2:length(year_list)){
  year.mat.smdp[smdp$year==year_list[i],i-1] <- 1
}

# explanatory variable matrix
# kakusu = name complexity (# of strokes)
x.mat.smdp.int1 <- cbind(smdp$kakusu, smdp$kakusu*log(smdp$ku_ncand), smdp$length,
                         log(smdp$ku_ncand), year.mat.smdp)

smdp.dat.int1 <- list(N=nrow(smdp),
                      K=ncol(x.mat.smdp.int1),
                      y=smdp$voteshare,
                      X=x.mat.smdp.int1,
                      C=length(unique(smdp$candID)),
                      Cand=smdp$candID,
                      P=length(unique(smdp$ptyID)),
                      Pty=smdp$ptyID,
                      M=4,
                      logku_ncand=log(2:5))

smdp.all.intr1 <- sampling(stanModel.interact, smdp.dat.int1, iter=3000, warmup=1000, 
                           thin=1, chains=3, seed=1234)

						   
# kakusu_sd = standard deviation of name complexity in the district
x.mat.smdp.int2 <- cbind(smdp$kakusu, smdp$kakusu*smdp$kakusu_sd, smdp$length, 
                         log(smdp$ku_ncand), smdp$kakusu_sd, year.mat.smdp)

smdp.dat.int2 <- list(N=nrow(smdp),
                      K=ncol(x.mat.smdp.int2),
                      y=smdp$voteshare,
                      X=x.mat.smdp.int2,
                      C=length(unique(smdp$candID)),
                      Cand=smdp$candID,
                      P=length(unique(smdp$ptyID)),
                      Pty=smdp$ptyID,
                      M=length(0:15),
                      logku_ncand=0:15)

smdp.all.intr2 <- sampling(stanModel.interact, smdp.dat.int2, iter=3000, warmup=1000, 
                           thin=1, chains=3, seed=1234)

#save(sntv.all.intr1, sntv.all.intr2, smdp.all.intr1, smdp.all.intr2, 
#     file="StanFits_Interaction.RData")




###
### summary
###
library(rstan)
#load("StanFits_Interaction.RData")

# extract marginal effect of name complexity conditional on 
# the number of candidates in the district
sntv.me1 <- extract(sntv.all.intr1)["dydx"][[1]]
sntv.mean1 <- apply(sntv.me1, 2, mean)
sntv.lwr1 <- apply(sntv.me1, 2, quantile, 0.025)
sntv.upr1 <- apply(sntv.me1, 2, quantile, 0.975)

# Figure 2
#pdf("name_me_sntv1.pdf", height=6, width=7)
par(mar=c(5.1,4.1,2.1,2.1))
plot(log(2:14), log(2:14), type="n", ylim=c(min(sntv.lwr1), max(sntv.upr1)),
     ylab="Marginal Effect of Name Complexity", xlab="log(Number of Candidates)",
     xaxt="n")
axis(1, at=log(2:14), labels=rep("", times=13))
axis(1, at=log(c(2,3,7,14)), labels=paste0("log(", c(2,3,7,14), ")"))
abline(h=0, lty=1, lwd=0.5)
lines(log(2:14), sntv.mean1, lwd=1.5)
lines(log(2:14), sntv.lwr1, lty=2, lwd=1.5)
lines(log(2:14), sntv.upr1, lty=2, lwd=1.5)
#dev.off()


# Table H.1 in the appendix
summary(sntv.all.intr1, pars=c("beta[1]", "beta[2]", "beta[3]", "beta[4]", "beta[5]",
                               "sigma_c", "sigma_p"))$summary[,c(1,3)]
summary(sntv.all.intr2, pars=c("beta[1]", "beta[2]", "beta[3]", "beta[4]", "beta[5]", "beta[6]",
                               "sigma_c", "sigma_p"))$summary[,c(1,3)]
summary(smdp.all.intr1, pars=c("beta[1]", "beta[2]", "beta[3]", "beta[4]",
                               "sigma_c", "sigma_p"))$summary[,c(1,3)]
summary(smdp.all.intr2, pars=c("beta[1]", "beta[2]", "beta[3]", "beta[4]", "beta[5]",
                               "sigma_c", "sigma_p"))$summary[,c(1,3)]

							   
# extract marginal effect of name complexity conditional on 
# the standard deviation of complexity scores in the district
sntv.me2 <- extract(sntv.all.intr2)["dydx"][[1]]
sntv.mean2 <- apply(sntv.me2, 2, mean)
sntv.lwr2 <- apply(sntv.me2, 2, quantile, 0.025)
sntv.upr2 <- apply(sntv.me2, 2, quantile, 0.975)

# Figure H.1 in the appendix
#pdf("name_me_sntv2.pdf", height=6, width=7)
par(mar=c(5.1,4.1,2.1,2.1))
plot(0:15, 0:15, type="n", ylim=c(min(sntv.lwr2), max(sntv.upr2)),
     ylab="Marginal Effect of Name Complexity", 
     xlab="District-Level Standard Deviation of Name Complexity")
abline(h=0, lty=1, lwd=0.5)
lines(0:15, sntv.mean2, lwd=1.5)
lines(0:15, sntv.lwr2, lty=2, lwd=1.5)
lines(0:15, sntv.upr2, lty=2, lwd=1.5)
#dev.off()


# extract marginal effect of name complexity conditional on 
# the number of candidates in the district
smdp.me1 <- extract(smdp.all.intr1)["dydx"][[1]]
smdp.mean1 <- apply(smdp.me1, 2, mean)
smdp.lwr1 <- apply(smdp.me1, 2, quantile, 0.025)
smdp.upr1 <- apply(smdp.me1, 2, quantile, 0.975)

# extract marginal effect of name complexity conditional on 
# the standard deviation of complexity scores in the district
smdp.me2 <- extract(smdp.all.intr2)["dydx"][[1]]
smdp.mean2 <- apply(smdp.me2, 2, mean)
smdp.lwr2 <- apply(smdp.me2, 2, quantile, 0.025)
smdp.upr2 <- apply(smdp.me2, 2, quantile, 0.975)

# Figure H.2 in the appendix
#pdf("name_me_smdp.pdf", height=6, width=11)
par(mfrow=c(1,2))
plot(log(2:14), log(2:14), type="n", ylim=c(min(smdp.lwr2), max(smdp.upr1)),
     ylab="Marginal Effect of Name Complexity", xlab="log(Number of Candidates)",
     xaxt="n", main="Heterogeneous Effect by the Number of Candidates")
axis(1, at=log(2:14), labels=rep("", times=13))
axis(1, at=log(c(2,3,4,5)), labels=paste0("log(", c(2,3,4,5), ")"))
abline(h=0, lty=1, lwd=0.5)
lines(log(2:5), smdp.mean1, lwd=1.5)
lines(log(2:5), smdp.lwr1, lty=2, lwd=1.5)
lines(log(2:5), smdp.upr1, lty=2, lwd=1.5)

plot(0:15, 0:15, type="n", ylim=c(min(smdp.lwr2), max(smdp.upr1)),
     ylab="Marginal Effect of Name Complexity", 
     xlab="District-Level Standard Deviation of Name Complexity",
     main="Heterogeneous Effect by the SD of Name Complexity")
abline(h=0, lty=1, lwd=0.5)
lines(0:15, smdp.mean2, lwd=1.5)
lines(0:15, smdp.lwr2, lty=2, lwd=1.5)
lines(0:15, smdp.upr2, lty=2, lwd=1.5)
#dev.off()